home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacWorld 1999 July
/
Macworld (1999-07).dmg
/
Shareware World
/
Info
/
For Developers
/
Mops 3.4.sea
/
Mops source
/
PPC source
/
cg3
< prev
next >
Wrap
Text File
|
1999-01-18
|
66KB
|
2,496 lines
marker m__cg3
PPC?
[IF]
false constant debug?
[ELSE]
false constant debug?
[THEN]
\ =========================================================
\ EQUALIZATION
\ =========================================================
\
\ See the notes in the doco section in cg1.
forward setup_normal_call
false value forward? \ true if we're compiling a FORWARD definition
false value noname? \ true if we're compiling a :NONAME defn
false value mloc? \ true if we're compiling a :MLOC
false value entry? \ true if we're compiling a :ENTRY
2 constant gpr_call_cnt \ the number of stack cells we pass in GPRs
\ for calls that don't have named parms
2 constant fpr_call_cnt \ ditto for FPRs. This is also the number of
\ floating stack cells we take to be in FPRs
\ on return, if there aren't any FP flag bytes.
6 constant max_gpr_rtn_cnt \ the max number of stack cells we leave in
\ GPRs at EXIT time. 6 is enough, since any
\ more would probably have to get pushed off
\ to mem straight away by the caller. And we
\ need at least one spare for equalization.
6 constant max_fpr_rtn_cnt \ ditto for FPRs
max_GPR_rtn_cnt
constant max_eq_cnt \ the max number of GPRs we can equalize.
\ We must have one free for copying things
\ around.
0 value gpr_rtn_cnt \ the actual no of stack cells we leave in GPRs
\ at EXIT time for the current defn. We make
\ this variable to try to minimize reg
\ spills/refills.
0 value fpr_rtn_cnt \ ditto for floating point.
1 constant fwd_gpr_rtn_cnt \ for FORWARD defns, we have to use arbitrary values
\ for the return counts, since we'll be calling them
fpr_call_cnt
constant fwd_fpr_rtn_cnt \ by using fpr_call_cnt for the FORWARD FP return
\ count, we don't need to store FP flag bytes
\ for FORWARD definitions.
0 value xalignment
0 value #extern_parm_cells \ Set when we're compiling an external call.
\ This is the number of cells needed by
\ parameters. Normally the parms must
\ start at r3 and go on up from there.
0 value #extern_FP_parms \ Likewise - this is the number of FP parms
\ (these get loaded into the FPRs).
0 value #extern_result_cells \ Likewise, for results. Must be 0 or 1.
0 value #extern_FP_results \ Likewise, for FP results. Must be 0 or 1.
0 value extern_mask \ has a 1 corresponding to each GPR that
\ gets a dummy value because the corresponding
\ parm is floating.
true value adjust_stks?
objPtr eq_regs class_is ODs_class
: refTypeErr \ called from the default clause of various case
\ statements below, that dispatch on a reference.
dup .
dup pullRef >
IF 225 \ "We can't take the address of a register object" (i.e. we
\ can't equalize against it)
ELSE 221 \ "Impossible operand!"
THEN die
;
: FPerr \ called if we're asked to equalize a GPR to an FPR
\ for some strange reason
." FP number on data stack??" cr
." cstk: " printall: cstk cr
." cstk2: " printall: cstk2 cr
." fcstk: " printall: fcstk cr
." fcstk2:" printall: fcstk2 cr
;
: ADJUST_STKS
debug? if
." adjust_stks - stk_offset " stk_offset . ." fstk_offset " fstk_offset . cr
then
otAdd 0 setLiteralOp: instrn drop
stk_offset
IF SP_reg >RA: instrn SP_reg >RD: instrn
stk_offset >lit: instrn
compile: instrn
0 -> stk_offset
THEN
fstk_offset
IF FSP_reg >RA: instrn FSP_reg >RD: instrn
fstk_offset >lit: instrn
compile: instrn
0 -> fstk_offset
THEN
;
(* GET_RTN_CNTS returns the count of stack cells in regs for an exit
from the current definition. (We also make the decision here if this
is the first time.)
For the FPRs, we now require a minimum of fpr_call_cnt. This is so
that any word which does a non-FP Toolbox call won't end up with
a zero fpr_rtn_cnt, which might force all words calling it to have
this too, and all words calling them, etc. - resulting in dozens of
words needing FP info bytes when none of them do any FP ops!
*)
: GET_RTN_CNTS ( -- gpr_rtn_cnt fpr_rtn_cnt )
debug? if
." get_rtn_cnts here:" cr
printall: fcstk
then
gpr_rtn_cnt dup 0<
IF drop
size: cstk max_gpr_rtn_cnt min
dup -> gpr_rtn_cnt
THEN
fpr_rtn_cnt dup 0<
IF drop
size: fcstk max_fpr_rtn_cnt min fpr_call_cnt max
dup -> fpr_rtn_cnt
THEN
debug? if
." get_rtn_cnts returns: " gpr_rtn_cnt . fpr_rtn_cnt . cr
then
;
\ GET_LOOP_CNTS returns the count of stack cells in regs for the
\ beginning/end of the current loop. At present we just handle
\ this by calling get_return_cnt, as it may be not worth the extra
\ complexity of having a separate count for each loop.
: GET_LOOP_CNTS ( -- gpr_loop_cnt fpr_loop_cnt )
get_rtn_cnts ;
\ When equalizing on fcstk, it's easiest to just switch fcstk to cstk and
\ fcstk2 to cstk2, do the same calls, then switch them back.
: SWITCH_CSTKS
save: cstk save: cstk2
save: fcstk restore: cstk
save: fcstk2 restore: cstk2
;
: SWITCH_BACK
save: cstk restore: fcstk
save: cstk2 restore: fcstk2
restore: cstk2 restore: cstk
;
: ALLOCATE_FROM_CSTK2 \ bumps refCnts for regs ref'd from cstk2,
\ so that if we need a free reg there won't
\ be a conflict.
size: cstk2 0
?DO i select: cstk2
allocate: cstk2
LOOP
size: fcstk2 0
?DO i select: fcstk2
allocate: fcstk2
LOOP
;
reference thisRef
: FIX_DUPS_FOR_1_REF { cell# 1stTime? \ thisReg newReg any_dups? any_cstk2_matches? -- }
debug? if
." fix_dups_for_1_ref - cell# " cell# .
printall: cstk printall: cstk2
then
false -> any_dups?
cstk ->: thisRef reg: cstk -> thisReg
cell# stk: cstk2 cstk =?: cstk2 -> any_cstk2_matches?
\ first we do a scan to see if there are any duplicates, and get out if not.
\ We look down the rest of cstk from the current cell#.
size: cstk 1+ cell# 1+
?DO i stk: cstk i stk: cstk2
cstk =?: cstk2 or> any_cstk2_matches?
cstk =?: thisRef or> any_dups?
LOOP
debug? if
." any dups of this cell? " any_dups? if ." yes" else ." no" then cr
." any cstk2 matches? " any_cstk2_matches? if ." yes" else ." no" then cr
then
any_dups? 0EXIT \ if no duplicates, there's nothing to do
\ now we fix the duplicates by moving any that don't match the corresponding
\ entries in cstk2. If there weren't any, we'll leave the first alone.
size: cstk 1+ cell#
any_cstk2_matches? NIF 1+ THEN
?DO \ again we loop over lower cstk refs, looking for the dups
i stk: cstk
cstk =?: thisRef
IF \ here's the next one
i stk: cstk2 \ does it match the corresponding cstk2 ref?
cstk =?: cstk2 \ (matching dups are OK, we can leave them)
debug? if
dup if ." matches cstk2 cell - can leave it" cr then
then
NIF \ Nope, we need to fix it. If the cstk2
\ cell is a free reg, we'll use that, otherwise
\ we find a free one.
refType: cstk2 dup GPRref = swap FPRref = or
IF reg: cstk2 dup -> newReg select: eq_regs
get: ivar> refCnt in eq_regs
NIF allocate: eq_regs true
ELSE false
THEN
ELSE false
THEN
NIF getFreeReg: eq_regs -> newReg THEN
debug? if
." fixing duplicn by moving " thisReg .
." to " newReg . cr
then
thisReg compile_reg_move: eq_regs
thisReg select: eq_regs
1stTime? IF -1 +: ivar> refCnt in eq_regs THEN
newReg >reg: cstk
debug? if
." cstk is now:" printall: cstk cr
then
THEN
THEN
LOOP
;
: (FIX_DUPLICATES) { 1stTime? \ cSiz thisTyp -- }
debug? if
." fix_duplicates called - 1stTime? " 1stTime? . cr
printall: cstk printall: cstk2
then
size: cstk -> cSiz
cSiz 2 < ?EXIT
cSiz 1+ 1
DO i stk: cstk
refType: cstk -> thisTyp
thisTyp GPRref = thisTyp FPRref = or
IF i 1stTime? fix_dups_for_1_ref THEN
LOOP
debug? if
." stacks after (fix_duplicates)" cr
printall: cstk printall: cstk2
then
;
: FIX_DUPLICATES { 1stTime? -- }
GPRs -> eq_regs
1stTime? (fix_duplicates)
FPRs -> eq_regs switch_cstks
debug? if
." now doing FPRs:" cr
then
1stTime? (fix_duplicates)
switch_back
;
0 value CONFLICT_REG
: CSTK_CONFLICT? { stkCell# -- b }
false
size: cstk 1+ 1
?DO i stk: cstk
cstk2 =?: cstk
IF i stkCell# <>
IF drop true reg: cstk -> conflict_reg LEAVE THEN
THEN
LOOP
stkCell# stk: cstk
debug? if
dup if ." cstk_conflict? finds conflict for cell " stkCell# . cr
then
then
;
: CSTK2_CONFLICT? { stkCell# -- b }
false
size: cstk2 1+ 1
?DO i stk: cstk2
cstk =?: cstk2
IF i stkCell# <> IF drop true LEAVE THEN
THEN
LOOP
stkCell# stk: cstk2
debug? if
dup if ." cstk2_conflict? finds conflict for cell " stkCell# . cr
then
then
;
: FIX_REG_CONFLICT? { stkCell# 1stTime?
\ thisReg newReg useNewReg? -- conflict_handled? }
reg: cstk -> thisReg false -> useNewReg?
stkCell# cstk_conflict? \ does cstk2 GPR conflict on cstk somewhere?
NIF false EXIT THEN \ no - nothing for us to do here
\ There's a conflict, but we might be able to handle it in the back eq step.
1stTime?
NIF \ no back eq step! We'll have to move the conflicting reg to
\ a new reg, then move this reg to the target reg. Call this
\ plan A.
true -> useNewReg?
ELSE \ There will be a back eq step, but if there would be a
\ conflict there too, we'll revert to plan A, which is no
\ worse than anything else we might do (such as moving the
\ current reg to a new one).
stkCell# cstk2_conflict? -> useNewReg?
THEN
useNewReg?
IF
getFreeReg: eq_regs -> newReg
conflict_reg newReg true moveReg: eq_regs \ move confilcting reg out
reg: cstk2 select: eq_regs
reg: cstk compile_reg_move: eq_regs
\ move source to reg which was in conflict before
\ - note we can't move by recompiling or we'll
\ run right into the conflict!
debug? if
." needed to move conflicting reg " conflict_reg .
." to new reg " newReg . cr
then
stkCell# stk: cstk stkCell# stk: cstk2 \ restore selections
cstk2 ->: cstk
ELSE \ conflict, but OK to leave for back eq step.
debug? if
." OK to leave for back eq step" cr
then
stkCell# stk: cstk stkCell# stk: cstk2 \ restore selections
THEN
true \ we've handled the conflict.
;
\ ?FIX_XX>GPR_CONFLICT is a bit like FIX_GPR_CONFLICT, but handles the case
\ where the cstk operand is a literal or CR, which is a lot simpler, since
\ we don't have the option of leaving it to the back eq step. Also
\ we don't return a flag since we don't need it - the op still has to
\ be compiled no matter what happened. Note that of course this doesn't
\ apply to FPRs.
: ?FIX_XX>GPR_CONFLICT { stkCell# 1stTime? \ newReg -- }
debug? if
." ?fix_xx>gpr_conflict - 1stTime? " 1stTime? . cr
then
stkCell# cstk_conflict? \ does cstk2 GPR conflict on cstk somewhere?
0EXIT \ no - nothing for us to do here
\ There's a conflict. We do the same as plan A above - move the conflicting reg
\ to a new reg, then compile the lit into the target reg. We could instead have
\ chosen a new reg for the literal, but on average it would make no difference.
getFreeReg: GPRs -> newReg
conflict_reg newReg true moveReg: GPRs \ move confilcting reg out
stkCell# stk: cstk stkCell# stk: cstk2 \ restore selections
debug? if
." ?fix_xx>gpr_conflict needed to move" cr
." conflicting reg " conflict_reg .
." to new reg " newReg . cr
printall: cstk cr printall: cstk2 cr
then
;
: CHECK_FOR_PULL
refType: cstk
SELECT[ noRef ]=>
[ gprRef ]=> reg: cstk >gpr: res1 true
[ fprRef ]=> reg: cstk >fpr: res1 true
[ litRef ]=> lit: cstk false lit>gpr
false
[ CRref ]=> cstk CR>GPR
false
DEFAULT=> refTypeErr
]SELECT
\ need to check for a reg conflict?
IF false
size: cstk2 1+ 1
?DO i stk: cstk2
cstk =?: cstk2 IF drop true LEAVE THEN
LOOP
IF getFreeReg: eq_regs drop
addr: ivar> myRef in eq_regs ->: res1
debug? if
." reg conflict for later pull - using new reg " reg: res1 . cr
then
reg: cstk reg: res1 false moveReg: eq_regs
THEN
THEN
res1 ->: cstk
;
false value pushPull?
false value GPR_pushes_or_pulls?
false value FPR_pushes_or_pulls?
(* set by equalize_depths, and used by equalize_refs in deciding
whether to go top-down or bottom-up. If any cells are to be
pushed or pulled, we must go top-down, or cells get mixed up!
*)
: (EQUALIZE_DEPTHS) { 1stTime? \ #toPull n -- }
(* This ensures cstk and cstk2 have the same depth before we get into the
grubby details of equalization. We also need to ensure there's at
least one free register in case we need to copy things around.
*)
debug? if
." equalize_depths called - 1stTime? " 1stTime? . cr
printall: cstk printall: cstk2 cr
then
false -> pushPull?
size: cstk2 size: cstk - -> #toPull
#toPull 0EXIT
true -> pushPull?
#toPull 0>
IF \ cstk2 is deeper. Pull cells into cstk to match.
size: cstk 1+ -> n
#toPull
FOR n stk: cstk2
moveDown: cstk \ leaves element 0 selected - the one we're
\ going to pull to
>pull: cstk
1 ++> n
NEXT
ELSE \ cstk is deeper. Now we can't pull into cstk2 here. If
\ 1stTime? is true, we can arrange to do it in the back
\ equalization step, and here we just need to ensure the
\ cstk cell types are appropriate.
\ But if 1stTime? is false, we'll have to push off the
\ excess cstk cells into memory here.
neg> #toPull
1stTime?
IF
debug? if
." will be pulling " #toPull . ." regs on back eq step" cr
then
size: cstk2 1+ -> n
#toPull
FOR n stk: cstk
check_for_pull
1 ++> n
NEXT
ELSE
#toPull \ really # to push
FOR push&moveUp NEXT
THEN
THEN
debug? if ." stacks after equalize_depths" cr
printall: cstk printall: cstk2 cr
then
;
: EQUALIZE_DEPTHS { 1stTime? -- }
GPRs -> eq_regs
1stTime? (equalize_depths)
pushPull? -> GPR_pushes_or_pulls?
FPRs -> eq_regs switch_cstks
debug? if
." now doing FPRs:" cr
then
1stTime? (equalize_depths)
pushPull? -> FPR_pushes_or_pulls?
switch_back
;
: HANDLE_SPECIAL_REGS? { stkCell# -- handled? }
(* we call this word if we have a reg-reg equalization, on the first pass if
there's going to be a back equalization. In this situation we mustn't
overwrite a special register, so this word checks if one or both regs
are special. We handle this case in a similar way to one or both
operands being literal. If we handle it, we return true.
*)
reg: cstk select: eq_regs
get: ivar> special? in eq_regs
IF reg: cstk2 select: eq_regs
get: ivar> special? in eq_regs
IF \ both regs are special
\ - we allocate a new free reg
debug? if
." both regs are special - using a new one" cr
then
reg: cstk
getFreeReg: eq_regs \ leave reg# for moveReg: below
addr: ivar> myRef in eq_regs ->: cstk
true moveReg: eq_regs true EXIT
THEN
\ cstk reg is special - we can
\ move to the cstk2 reg, unless
\ there's a conflict
debug? if
." cstk reg is special - want to move "
print: cstk ." to " print: cstk2 cr
then
stkCell# cstk_conflict?
IF \ there's a conflict
getFreeReg: eq_regs drop addr: ivar> myRef in eq_regs ->: res1
debug? if
." but there's a reg conflict - changing to reg " reg: res1 . cr
then
stkCell# stk: cstk \ in case it changed
reg: cstk reg: res1 true moveReg: eq_regs
res1 ->: cstk true EXIT
THEN
reg: cstk reg: cstk2 true moveReg: eq_regs true EXIT
THEN
reg: cstk2 select: eq_regs
get: ivar> special? in eq_regs
NIF \ neither is special
false EXIT
THEN
\ only the cstk2 reg is special. If there's no cstk2 conflict on the
\ cstk reg, we can leave it for the back eq step (there definitely should
\ be one!). Otherwise we'll have to use a new reg.
debug? if
." cstk2 reg is special" cr
then
stkCell# cstk2_conflict? NIF true EXIT THEN
debug? if
." conflict means we have to use a new reg" cr
then
reg: cstk
getFreeReg: eq_regs \ leave reg# for moveReg: below
addr: ivar> myRef in eq_regs ->: cstk
true moveReg: eq_regs
true
;
: EQUALIZE_GPR>GPR { stkCell# 1stTime? \ mustMove? -- }
false -> mustMove?
debug? if
." equalize_gpr>gpr - 1stTime? " 1stTime? . cr
then
1stTime?
IF stkCell# handle_special_regs? ?EXIT
\ if reg(s) special, handle and out
stkCell# 1stTime? fix_reg_conflict? ?EXIT
\ out if it's been handled
\ now if we're moving to a lower reg number, we'll move it now
\ - hopefully to migrate regs down, and reduce moves when we do a call, or
\ at semicolon time.
gpr: cstk gpr: cstk2 > IF true -> mustMove? THEN
ELSE
true -> mustMove?
THEN
mustMove?
NIF \ see if we can move by recompiling, or put it off in the hope
\ that we can recompile in the back eq step.
gpr: cstk gpr: cstk2
moveReg_by_recompiling?: GPRs ?EXIT \ success!
stkCell# cstk2_conflict? -> mustMove?
THEN
debug? if
mustMove? if ." moving now"
else ." leaving for back eq step"
then cr
then
mustMove?
IF stkCell# 1stTime? fix_reg_conflict? ?EXIT
gpr: cstk gpr: cstk2 false moveReg: GPRs
\ note we don't update refs here - at this
\ stage we've finalized the identities of
\ the regs we want to move and don't want
\ to alter them!
cstk2 ->: cstk
THEN
;
: (PULL) { stkCell# -- }
stkCell# stk: cstk
res1 ->: cstk
refType: res1 GPRref =
IF gpr: res1 select: GPRs SP_reg stk_offset 0 compPull: GPRs
1cell ++> stk_offset
ELSE
fpr: res1 select: FPRs FSP_reg fstk_offset 0 compPull: FPRs
8 ++> fstk_offset
THEN
;
: EQUALIZE_PULL>GPR { stkCell# -- }
gpr: cstk2 >gpr: res1
stkCell# cstk_conflict?
IF \ there's a conflict
getFreeReg: GPRs >gpr: res1
debug? if
." reg conflict for pull - changing to gpr " reg: res1 . cr
then
THEN
stkCell# (pull)
;
: PULL>NEW_GPR { stkCell# -- }
getFreeReg: GPRs >gpr: res1
debug? if
." pull>new_gpr will use gpr " reg: res1 . cr
then
stkCell# (pull)
;
: EQUALIZE_LIT>GPR { stkCell# 1stTime? \ mustMove? xx -- }
debug? if
." equalize_lit>gpr - 1stTime? " 1stTime? . cr
then
1stTime?
IF \ mustn't clobber a special reg
gpr: cstk2 select: GPRs
get: ivar> special? in GPRs
IF \ cstk2 reg is special - we have to allocate a new one
[ debug? ] [if]
." lit -> special reg - using a new one" cr
[then]
lit: cstk false lit>gpr
res1 ->: cstk EXIT
THEN
THEN
\ OK to use cstk2 reg now
stkCell# 1stTime? ?fix_xx>gpr_conflict
gpr: cstk2 select: GPRs
otFetch put: ivar> opType in GPRs
clear: ivar> A_opnd in GPRs
lit: cstk >lit: ivar> B_opnd in GPRs
compile: GPRs
current: GPRs >gpr: cstk
;
: EQUALIZE_CR>GPR { stkCell# 1stTime? \ mustMove? -- }
1stTime?
IF \ mustn't clobber a special reg
gpr: cstk2 select: GPRs
get: ivar> special? in GPRs
IF \ cstk2 reg is special - we have to allocate a new one
debug? if
." CR -> special reg - using a new one" cr
then
cstk CR>GPR
free: cstk res1 ->: cstk EXIT
THEN
THEN
\ OK to use cstk2 reg now
stkCell# 1stTime? ?fix_xx>gpr_conflict
cstk gpr: cstk2 cr>this_gpr \ also frees the CR ref
cstk2 ->: cstk
;
: EQUALIZE_FPR>FPR { stkCell# 1stTime? \ mustMove? -- }
false -> mustMove?
debug? if
." equalize_fpr>fpr - 1stTime? " 1stTime? . cr
then
1stTime?
IF stkCell# handle_special_regs? ?EXIT
\ if reg(s) special, handle and out
stkCell# 1stTime? fix_reg_conflict? ?EXIT
\ out if it's been handled
\ now if we're moving to a lower reg number, we'll move it now
\ - hopefully to migrate regs down, and reduce moves when we do a call, or
\ at semicolon time.
fpr: cstk fpr: cstk2 > IF true -> mustMove? THEN
ELSE
true -> mustMove?
THEN
mustMove?
NIF \ see if we can move by recompiling, or put it off in the hope
\ that we can recompile in the back eq step.
fpr: cstk fpr: cstk2
moveReg_by_recompiling?: FPRs ?EXIT \ success!
stkCell# cstk2_conflict? -> mustMove?
THEN
debug? if
mustMove? if ." moving now"
else ." leaving for back eq step"
then cr
then
mustMove?
IF stkCell# 1stTime? fix_reg_conflict? ?EXIT
fpr: cstk fpr: cstk2 false moveReg: FPRs
\ note we don't update refs here - at this
\ stage we've finalized the identities of
\ the regs we want to move and don't want
\ to alter them!
cstk2 ->: cstk
THEN
;
: EQUALIZE_PULL>FPR { stkCell# -- }
fpr: cstk2 >fpr: res1
stkCell# cstk_conflict?
IF \ there's a conflict
getFreeReg: FPRs >fpr: res1
debug? if
." reg conflict for pull - changing to fpr " reg: res1 . cr
then
THEN
stkCell# (pull)
;
: PULL>NEW_FPR { stkCell# -- }
getFreeReg: FPRs >fpr: res1
debug? if
." pull>new_fpr will use fpr " reg: res1 . cr
then
stkCell# (pull)
;
: AVOID_SPECIAL_GPR { \ newReg -- }
gpr: cstk select: GPRs
get: ivar> special? in GPRs
IF
getFreeReg: GPRs -> newReg
[ debug? ] [if]
." cstk gpr is special - moving to a free one: " newReg . cr
[then]
reg: cstk newReg false moveReg: GPRs
newReg >gpr: cstk
THEN
;
: EQUALIZE_1_REF_PAIR { stkCell# 1stTime? \ litVal -- }
debug? if
." equalize_1_ref_pair" cr print: cstk print: cstk2 cr
[ ppc? ] [if] dbgr [then]
then
cstk =?: cstk2 ?EXIT \ if already equal, nothing to do
\ now we just enumerate all the combinations. This is a bit long-winded,
\ but each combination is simple enough.
refType: cstk2
SELECT[ noRef ]=> \ we assume this is going to be handled on the back eq step
[ gprRef ]=>
refType: cstk
SELECT[ gprRef ]=>
stkCell# 1stTime? equalize_gpr>gpr
[ litRef ]=>
stkCell# 1stTime? equalize_lit>gpr
[ fprRef ]=> FPerr
[ CRref ]=> \ cstk cstk2 CR>GPR
stkCell# 1stTime? equalize_cr>gpr
\ cstk2 ->: cstk
[ pullRef ]=> stkCell# equalize_pull>gpr
DEFAULT=> refTypeErr
]SELECT
[ fprRef ]=>
refType: cstk
SELECT[ fprRef ]=> stkCell# 1stTime? equalize_fpr>fpr
[ pullRef ]=> stkCell# equalize_pull>fpr
DEFAULT=> fpErr
]SELECT
[ crRef ]=>
refType: cstk
SELECT[ gprRef ]=> 1stTime?
IF \ we'll handle in back eq step
\ - we just have to check the
\ gpr is OK to clobber.
avoid_special_gpr
ELSE db $ 999 $ deadbeef
THEN
[ litRef ]=> \ we get the lit to a GPR, then back eq
\ will fix
lit: cstk false lit>gpr
res1 ->: cstk
[ fprRef ]=> FPerr
[ CRref ]=> \ we have to do a CR->CR move here.
cstk cstk2 move_CR_bit
cstk2 ->: cstk
[ pullRef ]=> stkCell# pull>new_gpr
DEFAULT=> refTypeErr
]SELECT
[ litRef ]=>
refType: cstk
SELECT[ gprRef ]=> avoid_special_gpr
[ litRef ]=> lit: cstk lit: cstk2 <>
IF \ we only need to do anything if they're
\ different, in which case we have to
\ load into a gpr. lit>gpr looks after this.
lit: cstk false lit>gpr
res1 ->: cstk
THEN
[ fprRef ]=> FPerr
[ CRref ]=>
cstk cr>gpr
res1 ->: cstk
[ pullRef ]=>
stkCell# pull>new_gpr
DEFAULT=> refTypeErr
]SELECT
DEFAULT=> refTypeErr
]SELECT
;
: (EQUALIZE_REFS) { pushes/pulls? 1stTime? \ #cells n bottom_up? -- }
debug? if
." equalize_refs called:" cr
printall: cstk printall: cstk2 cr
then
false -> bottom_up?
size: cstk -> #cells
#cells
IF
\ if we're going to be moving regs downwards, it's more
\ advantageous to start at the bottom and go up - but if we're
\ doing any pushes or pulls, we have to go top-down no matter what.
pushes/pulls?
NIF #cells 2 >=
IF 1 stk: cstk 1 stk: cstk2
refType: cstk gprRef =
refType: cstk2 gprRef = and
IF reg: cstk reg: cstk2 > -> bottom_up?
THEN
THEN
THEN
debug? if
." bottom_up? " bottom_up? if ." true" else ." false" then cr
then
bottom_up?
IF #cells ELSE 1 THEN -> n
#cells
FOR n stk: cstk n stk: cstk2
n 1stTime? equalize_1_ref_pair
bottom_up? IF 1 --> n ELSE 1 ++> n THEN
NEXT
THEN
debug? if
." stacks after equalize_refs" cr
printall: cstk printall: cstk2
then
;
: EQUALIZE_REFS { 1stTime? -- }
GPRs -> eq_regs
GPR_pushes_or_pulls? 1stTime? (equalize_refs)
FPRs -> eq_regs switch_cstks
debug? if
." now doing FPRs:" cr
then
FPR_pushes_or_pulls? 1stTime? (equalize_refs)
switch_back
;
: EQUALIZE_FOR_CONDITIONAL { branchCDP \ locBrCDP destCDP -- }
CDP +L: eq_ranges
true -> equalizing?
GPRs -> eq_regs
adjust_stks \ it's probably been called already, but
\ this won't hurt
branchCDP 4+ -> basic_block_start \ BB starts straight after the branch
\ ." now in equalize_for_conditional" cr .s dbgr
restore: fcstk2 restore: cstk2 \ get saved cstk and fcstk to cstk2 & fcstk2
\ .s
save: cstk2 save: fcstk2 \ and copy to cstk2_orig and fcstk2_orig
restore: fcstk2_orig restore: cstk2_orig \ in case
\ cstk2/fcstk2 get changed - although I
\ think they probably shouldn't
debug? if
." equalize_for_conditional called:" cr
printall: cstk printall: cstk2 printall: fcstk printall: fcstk2
then
allocate_from_cstk2
true fix_duplicates
true equalize_depths
true equalize_refs
adjust_stks
\ now we compile a branch over the back equalization code we're about to
\ generate:
CDP -> locBrCDP
compile_unconditional_branch
save: cstk restore: cstk2
save: cstk2_orig restore: cstk
save: fcstk restore: fcstk2
save: fcstk2_orig restore: fcstk
debug? if ." stacks before back equalizing:" cr
printall: cstk printall: cstk2 printall: fcstk printall: fcstk2
then
true -> eq_block_recompiling_move? \ mustn't monkey with reg moves during
\ back equalization!
false fix_duplicates
false equalize_depths
false equalize_refs
adjust_stks
\ now, did we actually compile anything in the back equalization?
CDP 4- locBrCDP =
IF \ no - wipe out the uncond branch altogether, and
[ debug? ] [if]
." nothing compiled in back eq - deleting the branch" cr
[then]
4 --> CDP
CDP \ present posn is dest of main branch
ELSE \ yes - resolve the uncond branch over the
\ back eq code
debug? if
." compiled some code in back eq step" cr
then
locBrCDP CDP resolve_branch
locBrCDP 4+ \ back eq code start is dest of main branch
THEN
-> destCDP
\ now we resolve the main branch, to the dest we worked out above:
branchCDP destCDP resolve_branch
update_refcnts
debug? if
." end of equalize_for_conditional - final stacks (should be the same):" cr
printall: cstk printall: cstk2
then
false -> eq_block_recompiling_move?
CDP +L: eq_ranges
false -> equalizing?
;
\ =========================================================
\ SIMPLE_EQUALIZE
\ =========================================================
: SIMPLE_EQUALIZE { gpr_cnt fpr_cnt -- }
true -> equalizing?
GPRs -> eq_regs
debug? if
." simple_equalize called - cstk:" cr printall: cstk
." gpr_cnt:" gpr_cnt . cr
." fpr_cnt:" fpr_cnt . cr
." CDP:" CDP .h cr
then
fpr_cnt -1 = IF fpr_call_cnt -> fpr_cnt THEN \ -1 means use the default
gpr_cnt fpr_cnt setup_normal_call
debug? if
." stacks set up for equalization:" cr
printall: cstk printall: cstk2
then
false fix_duplicates
false equalize_depths
false equalize_refs
update_refcnts
adjust_stks? IF adjust_stks ELSE true -> adjust_stks? THEN
debug? if
." final stacks - should be the same:" cr
printall: cstk printall: cstk2
then
set_backstop_CDP \ the places where we use simple_equalize
\ all require something this
false -> equalizing?
;
\ =========================================================
\ LOOP EQUALIZATION
\ =========================================================
\ PREPARE_FOR_LOOP is called from <mark when we're setting up a loop.
: PREPARE_FOR_LOOP
debug? if
." prepare_for_loop called - calling equalize_for_call to set up:" cr
then
(* We could define a separate "loop count" for each loop - the number of
cells in regs that we're going to equalize to. But this might be
overkill, and certainly would complicate LEAVE (which would have to
keep track of the appropriate count for the innermost containing
DO loop, not the innermost containing loop). So at present we'll
just use the rtn_cnt mechanism, which is simple, and probably
nearly as good anyway (especially if definitions are short, in which case
we probably won't have done an EXIT yet, so return_cnt will be set
to whatever the depth is here at the loop start).
*)
get_rtn_cnts simple_equalize
adjust_stks
CDP -> basic_block_start CDP -> loop_start
;
: UPDATE_STORES
31 0
DO i select: stored_GPRs
get: ivar> opType in stored_GPRs otStore =
IF i select: GPRs
get: ivar> opCDP in GPRs
put: ivar> lastRefCDP in stored_GPRs
THEN
LOOP
31 0
DO i select: stored_FPRs
\ get: ivar> opType in stored_GPRs otFPstore = \ a BUG, surely!
get: ivar> opType in stored_FPRs otFPstore =
IF i select: FPRs
get: ivar> opCDP in FPRs
put: ivar> lastRefCDP in stored_FPRs
THEN
LOOP
;
: HOIST_INVARIANTS
debug? if
." hoisting invariants to before loop" cr
then
hoist? 0EXIT \ bail out if we've disabled hoisting
?hoist_all: GPRs
?hoist_all: FPRs
update_stores
debug? if
." hoisting stored_GPRs" cr
printall: stored_GPRs
then
?hoist_all: stored_GPRs
\ ?hoist_all: CRs \ very dubious about this! Note if we ever do
\ it we must exclude CR0 since that's set as a
\ side effect, and all its OD fields won't
\ be set up!
;
: EQUALIZE_FOR_LOOP { markCDP -- }
debug? if
." equalize_for_loop called:" cr
printall: cstk printall: cstk2 printall: fcstk printall: fcstk2
then
true -> equalizing?
GPRs -> eq_regs
adjust_stks
markCDP -> basic_block_start \ prepare_for_loop should have already
\ done this, but let's be sure.
restore: fcstk2 restore: cstk2
allocate_from_cstk2
false fix_duplicates
false equalize_depths
false equalize_refs
update_refcnts
adjust_stks
debug? if ." final stacks - should be the same:" cr
printall: cstk printall: cstk2
then
false -> equalizing?
;
\ =========================================================
\ CALL EQUALIZATION, including PROLOG and EPILOG
\ =========================================================
0 value svFramesize
0 value sv#gprs
0 value sv#fprs
\ 0 value savedRegs
false value dont_save_r20?
0 value #xs_parms
0 value frame_offs
\ SP_reg 16 << \ can't do compile-time ops when target compiling
$ 120000 constant SP<<16
\ RP_reg 16 <<
$ 110000 constant RP<<16
\ These words are factored out of compile_prolog and compile_epilog
\ basically to shorten the former, which is quite long enough already.
\ These defns are only called from there.
: move_regs { #regs instrn decrement \ reg# -- }
#regs 0EXIT
31 -> reg#
#regs
FOR \ for each reg to be saved:
decrement ++> frame_offs
instrn RP<<16 or frame_offs or
reg# 21 << or code, \ stw reg, n(RP)
-1 ++> reg#
NEXT
;
ppc?
[if]
: move_vrs { instrn -- }
#VL 0EXIT
$ 39800000 RP<<16 or
frame_offs 16 - or
code, \ addi r12, RP, offset
#VL FOR
instrn ( $ 7C0061CE ) 31 i - 21 << or code, \ stvx vn, 0, r12
i IF
$ 3980FFF0 12 16 << or code, \ addi r12, r12, -16
THEN
-16 ++> frame_offs
NEXT
;
[then]
objPtr parmsRegs class_is ODs_class
: move_parms { #parms call_cnt decrement stk_offs stk_reg gprs?
\ srcReg# dstReg# #parms2move n -- }
#parms 0EXIT
31 -> dstReg#
\ if it's a forward defn, we may have to pull some parms from memory, since
\ only call_cnt cells are in regs.
forward?
IF #parms call_cnt >
IF #parms call_cnt - -> n \ number to pull
n decrement * ++> stk_offs
stk_offs \ save for adjustment
n FOR
dstReg# select: parmsRegs
decrement --> stk_offs
stk_reg stk_offs 0 compPull: parmsRegs
-1 ++> dstReg#
NEXT
gprs? IF -> stk_offset ELSE -> fstk_offset THEN
adjust_stks
call_cnt \ the number for the reg -> reg moves below
ELSE
#parms
THEN
ELSE
#parms
THEN
-> #parms2move
(* Now we initialize srcReg# to the first reg we move. Our current
policy is that we don't want the number of regs in use to get
too low, which would increase the number of mem fetches. So if
the number of parms is less than a minimum (given by call_cnt)
we make up the number with cached stack cells. So with call_cnt
currently 2, this means that if there's only one named integer
parm, we pass it in r4, not r3, and leave the next stack cell in
r3. But if this is a callback or a shared library entry, we
mustn't do this, but simply start the parms in r3/f1.
*)
entry?
IF 0
ELSE call_cnt #parms2move - 0 max
THEN
gprs? if 3 else 1 then + -> srcReg#
#parms2move
FOR srcReg# dstReg# true moveReg: parmsRegs
1 ++> srcReg# -1 ++> dstReg#
NEXT
;
(* We call COMPILE_PROLOG to compile a prolog at the start of a definition.
Our prologs are a bit different to the standard PPC convention,
although they do much the same things.
1. We use the return stack for a frame since we have to leave the data
stack untouched.
2. We don't use the standard linkage area format since we don't need to.
In particular we save the link register in the newly created frame
rather than the caller's frame (which mightn't be pointed to on entry
by RP anyway, since the caller might have done >R or something first).
This also means that we can load the LR as early as possible in the
return sequence, so that maybe the blr won't stall.
The frame format is (going left to right, or UP in memory):
saved LR | saved r20 | temp object block | saved gprs | saved fprs |
The saved LR is omitted for leaf calls, and r20 (the obj base reg) is
only saved for method calls. Also we only allocate a temp object block
if we need to. These items are simply omitted from the frame if not
used - they're not left empty; they're completely omitted.
This gives us a chance of not having a frame at all if we don't
need it.
If we have temp objects, we always need a frame, so in this case
we simplify things a bit by leaving a fixed space of 16 bytes above
RP before the temp object block starts. This preserves alignment
on the temp obj block, and is enough space whatever else we're doing
in the frame. It even works when there are vectors (see below).
So we know the size of the temp obj block, zClass leaves the size
in tempObj_block_size. This is the sum of the sizes of the temp
objects and their headers. (The temp objects are treated by zClass
as ivars of class Dummy, but there's no "object header" for Dummy
itself since it isn't a real class and its info isn't even around
when the temp objects are initialized.)
Also note we have to make sure any FPRs we copy to/from the frame are
8-byte aligned. To ensure this, we always 8-byte align RP, and save
the FPRs rightmost. Also the temp object block is 8-byte aligned
(since it starts 16 bytes from RP), so our ivar alignment scheme will
guarantee that any Float temp objects will be aligned.
Vectors complicate things. They have to be 16-byte aligned. So if
there are vectors, either in the temp object block or needing to be
saved/restored, we use a different frame format, which is 16-byte
aligned even though the RP is only guaranteed to be 8-byte aligned
before the frame is set up. This "xaligned" (extra aligned) format
takes a few extra instructions to set up, so we only use it if we
have to. We can also use it in future if any other PowerPC extensions
turn out to need alignment greater than 8-byte.
Here's the xaligned frame format:
saved RP | saved LR | saved r20 | spare | temp object frame |
saved gprs | saved fprs | saved vrs |
Note the temp object block comes in its usual place, 16 bytes above RP, but
is now 16-byte aligned since RP is. Also note that since we make sure
the saved vrs are 16-byte aligned, the saved fprs are guaranteed to be
8-byte aligned as required.
COMPILE_PROLOG compiles code which allocates the frame on the return stack,
saves LR and the other relevant regs, then moves the top stack cells to the
named parm regs.
On entry CDP points to the beginning of the defn.
Leaf procs are a special case - not only don't we save LR, we do the prolog
work in the caller, not the callee. In this case saveLR? will be false.
The reason for this is that we can sometimes consolidate register
saving/restoring if the current routine has less locals than a routine
it's calling - we can save the "extra" regs as if they were locals
belonging to the caller, not the callee, and so not have to do it on
each call to the callee. But this scheme does take a bit more space,
which is why we currently only do it for leaf calls. It could easily
be extended if needed.
I did try an idea to not bother saving uninitialized locals, and it
worked, but I don't want to encourage leaving locals uninitialized so
I removed it!
If it's a method, we save off the object base reg (r20), then move
r12 to r20 (the caller will have put the new obj's addr in r12).
*)
\ setup_regs_for_prolog is factored out of compile_prolog even though
\ it's only called once, because compile_prolog is already too long!
: setup_regs_for_prolog { #gprs #parms #fprs #fparms meth? callingLeaf?
\ srcReg# dstReg# #parms2move #fparms2move n -- }
\ first we compile code to save regs - vrs, then fprs, then gprs.
[ ppc? ] [if]
$ 7C0061CE move_vrs \ stvx vn, 0, r12 - save VRs if necessary
[then]
#fprs $ D8000000 -8 move_regs \ save FPRs
#gprs $ 90000000 -4 move_regs \ save GPRs
\ now we compile code to move the parms over. Note we're only
\ allowing a max of 8 integer parms at present, though some suitably
\ modified version of the forward defn code below would allow more.
GPRs -> parmsRegs
#parms gpr_call_cnt 1cell stk_offset SP_reg true move_parms
#fprs 0>=
IF FPRs -> parmsRegs
#fparms fpr_call_cnt 8 fstk_offset FSP_reg false move_parms
THEN
(* ***
\ $$$$ testing here -- see if I can initialize locals to zero.
#gprs #parms - 0
?DO
$ 38000000 32 #gprs - i + 21 << or code,
LOOP
#fprs #fparms - 0
?DO
$ FC007090 32 #fprs - i + 21 << or code,
LOOP
\ $$$$ end test
*** *)
[ ppc? ] [if] \ in target compilation we don't use const data
\ pointers or temp objects
CD_gpr#
IF
CDP
3 FOR nop, NEXT \ nops for padding in case needed
-> CDP
CD_gpr# select: GPRs
CD_gpr# 0 -> CD_gpr#
\ const_data_start
CD_GPR_loc
b&d >blit: GPRs noRef >Atype: GPRs
otFetch put: ivar> opType in GPRs compile: GPRs
>Agpr: GPRs
-> CD_gpr#
CD_gpr# >Bgpr: GPRs
otAdd put: ivar> opType in GPRs clear: ivar> subtype in GPRs
compile: GPRs
THEN
(* Finally, if there are temp objects, we have to set up the frame pointer.
This will point to the dummy "object" in the frame, whose ivars are the
temp objects. We use an internal local variable as the frame pointer -
its gpr# is in TO_gpr# (and in zClass we patch locreg to identify itself
as this gpr).
The frame pointer will contain the new RP value plus an offset -
we use an offset of 16 in all situations (see earlier comment).
*)
callingLeaf? ?EXIT \ no temp objects in leaf calls
tempObj_block_size 0EXIT \ out if there aren't any
TO_gpr# -> dstReg#
$ 38000000 dstReg# 21 << or RP<<16 or
16 or code, \ addi reg#, RP, 16
[then]
;
: make_leaf_frame { meth? origFramesize framesize offs \ frameDone? -- }
false -> frameDone?
meth?
IF (* it's a method, so we may need to save r20, and in this
case we can also allocate the frame via stwu, since without
a saved LR, the saved r20 will be leftmost in the frame.
The only situation where we don't need to do save r20 is when
the new value of r20 is r20 itself. This will happen in the
case of a bind to self if there's no embedded object offset.
The new addr to put into r20 is ref'd by the top of cstk.
*)
1 operands
refType: opnd1 gprRef =
IF reg: opnd1 20 = -> dont_save_r20? THEN
dont_save_r20?
IF \ we only just decided, so we have to adjust the
\ frame. If the original framesize wasn't 8-byte
\ aligned, we assume it was 4-byte aligned, and
\ go back by 8 bytes. The main reason we bother
\ with this adjustment is that sometimes the frame
\ will disappear altogether.
origFramesize 7 and
IF 8 ++> offs -8 ++> frameSize -8 ++> svFrameSize
-8 ++> frame_offs
THEN
ELSE \ we compile the code to save r20. The saved
\ r20 goes leftmost in the frame.
20 select: GPRs
CDP put: ivar> lastRefCDP in GPRs \ we're about to ref it
$ 94000000 RP<<16 or obj_base_reg 21 << or
offs or code, \ stwu r20, -framesize(RP)
true -> frameDone?
opnd1 20 get_to_this_gpr
THEN
THEN
\ now if we need a frame and we haven't created it already, we
\ do it here:
framesize frameDone? not and
IF $ 38000000 RP_reg 21 << or RP<<16 or
offs or code, \ allocate frame by compiling
THEN \ addi RP, RP, -framesize
\ true -> frameDone?
;
: make_xaligned_frame { meth? offs -- }
$ 7E208B78 code, \ mr r0, RP
$ 3A310100 offs or code, \ addi RP, -framesize
$ 56310000 \ rlwinm RP, RP, 0, 0, 31-xalignment
31 xalignment - 2* or code,
$ 90110000 code, \ stw r0, (RP)
LR>r0 code, \ mflr r0
$ 90110004 code, \ stw r0, 4(RP)
meth?
IF \ we need to save r20 in the frame and then
\ copy r12 to r20 (the new obj base addr).
\ The saved r20 goes straight after the saved
\ LR in the frame - offset 8.
local?
IF true -> dont_save_r20?
ELSE
$ 90000008 RP<<16 or
obj_base_reg 21 << or code, \ stw r20, 8(RP)
$ 7D946378 code, \ mr r20, r12
THEN
THEN
;
: make_nonleaf_frame { meth? offs -- }
LR>r0 code, \ mflr r0
$ 94000000 RP<<16 or
offs or code, \ stwu r0, -framesize(RP)
\ (this saves LR and allocates the frame
\ in one hit)
meth?
IF \ we need to save r20 in the frame and then
\ copy r12 to r20 (the new obj base addr).
\ The saved r20 goes straight after the saved
\ LR in the frame - offset 4.
local?
IF true -> dont_save_r20?
ELSE
$ 90000004 RP<<16 or
obj_base_reg 21 << or code, \ stw r20, 4(RP)
$ 7D946378 code, \ mr r20, r12
THEN
THEN
;
: COMPILE_PROLOG { #gprs #parms #fprs #fparms callingLeaf? meth?
\ origFramesize framesize n offs -- }
false -> dont_save_r20?
local?
IF 0
ELSE
#gprs 0 max -> #gprs \ just in case
#fprs 0 max -> #fprs
#gprs cells #fprs 3 << + #VL 4 << +
THEN
tempObj_block_size ?dup
IF \ we have a temp object frame - in this case we always
\ use an offset of 16 above RP (see earlier comment).
+ 16 +
ELSE
callingLeaf?
NIF cell+ THEN \ if not a leaf call, we save LR
meth?
local? not and IF cell+ THEN \ if method, we normally save r20
THEN
dup -> origFramesize \ save unaligned framesize
\ now we align framesize to whatever we need to align it to - at least 8-byte
\ (2**3) alignment, but maybe more. Currently the max is 16.
3 xalignment max #align_2**n
-> framesize
framesize negate $ ffff and -> offs
framesize -> svFramesize \ for compile_epilog
framesize -> frame_offs \ initial offset for storing into frame (from right)
\ *** Note - assumes frame is already allocated
\ when frame_offs is used - frame_offs
\ must be positive!! ***
#gprs -> sv#gprs
#fprs -> sv#fprs
callingLeaf?
IF
meth? origFramesize framesize offs make_leaf_frame
ELSE \ not a leaf, and we're compiling the prolog.
CDP dup -> basic_block_start -> backstop_CDP
xalignment 3 >
IF
meth? offs make_xaligned_frame
ELSE
meth? offs make_nonleaf_frame
THEN
THEN
local? ?EXIT \ if an internal defn in a local section, we're done
#gprs #parms #fprs #fparms meth? callingLeaf? setup_regs_for_prolog
;
: COMPILE_EPILOG { callingLeaf? meth? \ reg# n -- }
callingLeaf?
NIF
xalignment 3 >
IF $ 80000004
ELSE $ 80000000
THEN
RP<<16 or code, \ lwz r0, (RP) - or 4(RP) if this is an xaligned frame
r0>LR code, \ mtlr r0
THEN
svFrameSize -> frame_offs
meth?
IF \ it's a method, so we may have saved r20. If we're calling a leaf,
\ it's at 0(RP), otherwise at 4(RP) with the saved LR at 0(RP).
dont_save_r20?
NIF $ 80000000 RP<<16 or 4 callingLeaf? not and or
obj_base_reg 21 << or code, \ lwz r20, 0/4(RP)
THEN
THEN
local?
NIF \ we compile code to restore regs - vrs, then fprs, then gprs.
[ ppc? ] [if]
$ 7C0060CE move_vrs \ lvx vn, 0, r12 - restore VRs if nec
[then]
sv#fprs $ C8000000 -8 move_regs \ restore FPRs
sv#gprs $ 80000000 -4 move_regs \ restore GPRs
THEN
xalignment 3 >
IF
$ 82310000 code, \ lwz RP, (RP) - gets rid of xaligned frame
EXIT
THEN
svFramesize
IF $ 38100000 RP_reg 21 << or RP<<16 or
svFramesize or code, \ addi RP, RP, framesize
\ - gets rid of normal frame
THEN
;
false value cLeaf? \ global because we need them after the call
false value cMeth? \ - see call_h in ppc5
:f SETUP_NORMAL_CALL { #gprs #fprs -- }
debug? if
." setup_normal_call called with " #gprs . #fprs . cr
." calling a method? " cmeth? if ." yes" else ." no" then cr
printall: cstk cr
then
\ First, bitter experience shows, for equalization to work, we MUST
\ have a few free regs!!
size: cstk 7 >= IF spill THEN
cMeth?
IF \ method call - top of cstk is obj base addr, which we have
\ to put in r12 for the call. We CAN allow recompiling
\ of a reg move here, so we temporarily restore the old
\ value of basic_block_start.
1 operands
debug? if
." obj base addr to be moved to r12: " print: opnd1
then
opnd1 12 get_to_this_gpr
THEN
0 >size: cstk2
#gprs 0
?DO i 3+ >gpr: res1
res1 push: cstk2
LOOP
#fprs 0< IF ." oh no!!" 1 die THEN
0 >size: fcstk2
#fprs 0
?DO i 1+ >fpr: res1
res1 push: fcstk2
LOOP
;f
: SETUP_FAST_CALL { c#P c#PL c#FP c#FPL \ reg# -- }
(* called for calls to leaf routines. We do the "prolog" work here in the
caller instead.
*)
debug? if
." setting up fast call" cr
." calling a method? " cmeth? if ." yes" else ." no" then cr
." c#P " c#P . ." c#PL " c#PL . ." c#FP " c#FP . ." c#FPL " c#FPL . cr
then
\ first we save the regs that the caller will use, and that we haven't already
\ saved. We do this by calling compile_prolog with the appropriate reg
\ counts. We pass zero for the number of parms, since we handle those
\ separately below.
#PL \ the number of GPRs we're using
[ ppc? ] [if]
CD_gpr# 0= - \ if we haven't allocated a const data pointer
\ yet, we might still be going to do it later
\ in the defn, so to be safe we have to
\ assume we're going to need the extra GPR.
[then]
c#PL min \ that's the final number of GPRs to save
0 \ pass zero as #parms
#FPL c#FPL min \ the number of FPRs to save
0 \ pass zero as #fparms
true cMeth? compile_prolog
0 >size: cstk2 0 >size: fcstk2
\ now we handle the parms. First we look after any stack cells that have to
\ go to regs - this will only happen if our default gpr_call_cnt/fpr_call_cnt
\ is greater than the number of named parms of that type.
gpr_call_cnt c#P - 0 max 0
?DO
i 3+ >GPR: res1
res1 push: cstk2
LOOP
c#FP 0>=
IF fpr_call_cnt c#FP - 0 max 0
?DO
i 1+ >FPR: res1
res1 push: fcstk2
LOOP
THEN
\ now we look after the parms themselves - we set up for them to go straight to
\ their ultimate destination regs.
c#P
IF 31 -> reg#
c#P
FOR reg# >GPR: res1 res1 push: cstk2
1 --> reg#
NEXT
THEN
c#FP 0>
IF 31 -> reg#
c#FP
FOR reg# >FPR: res1 res1 push: fcstk2
1 --> reg#
NEXT
THEN
;
: setup_with_gpr_mask { #fprs -- }
debug? if
cr ." handle_gpr_mask called, with mask " extern_mask .h cr
then
0 >size: cstk2
#extern_parm_cells 0
?DO
extern_mask dup $ 8000 and
NIF \ OK, we include this one
i 3+ >gpr: res1
res1 push: cstk2
THEN
1 << $ FFFF and -> extern_mask
LOOP
0 -> extern_mask \ just in case
#fprs 0< IF ." oh no!!" 1 die THEN
0 >size: fcstk2
#fprs 0
?DO i 1+ >fpr: res1
res1 push: fcstk2
LOOP
debug? if
." cstk2 and fcstk2 now set up."
printall: cstk printall: cstk2 printall: fcstk printall: fcstk2
then
;
0 value extern_fp_test
: SETUP_EXTERN_CALL
{ \ #parm_bytes #xs_bytes #xs_not_in_regs sys_SP_offs n -- }
\ For a general desription of what we're doing here, see CALL_EXTERN
\ in cg5. We give the nuts and bolts detail here.
\ debug? if
extern_fp_test if
." setting up external call" cr
." #extern_parm_cells " #extern_parm_cells . cr
." #extern_result_cells " #extern_result_cells . cr
." #extern_FP_parms " #extern_FP_parms . cr
." #extern_FP_results " #extern_FP_results . cr
." cstk: " printall: cstk
cr ." stk_offset " stk_offset . cr
then
0 -> sys_SP_offs 0 -> #xs_not_in_regs
(* First we find the number of "excess parm bytes" - that is, the number
of bytes in the parm area which won't fit in the 8 GPRs available
for integer parms.
*)
#extern_parm_cells cells -> #parm_bytes
#extern_parm_cells 8 - 0 max -> #xs_parms
#xs_parms cells -> #xs_bytes
(* Now we have 2 cases to consider:
1. #xs_parms = 0. In this case the parms will all be in regs. We
don't have to worry about any memory-based parms.
2. #xs_parms > 0. In this case we need to have #xs_parms parm cells
in the parm area of the frame. Some of these might already be
in regs, the others will already be in memory. But they'll be
in the wrong order. Later parms will be lower in memory, while
in the parm area later parms have to be higher in memory.
So we won't try to use the mem area where the parm cells already
are, but use the dummy frame, since we can change the order
while we're moving the parms over.
Then finally we have to get the first 8 parm cells into the regs.
We do this with a slightly kludged form of our normal equalization.
So in both cases we maintain our dummy frame. We don't need to move
the sys_SP (r1). There's also no need to move our own SP, since it's
irrelevant to the called routine and will be preserved. We set
adjust_stks? false so that equalize_for_call (where we've been called
from) won't alter it.
*)
#xs_parms
IF size: cstk cells negate 24 -
stk_offset + -> sys_SP_offs
(* Now we have to store off any of the excess parms which happen to be
in regs (or literals on cstk). This is easiest if we go from the
top of cstk downwards, i.e. we start at the end of the parm list and
come backwards. The calling convention dictates that we store these
parms from the high-addr end of the parm area and come downwards in
memory. Our initial SP offset is thus the number of parm bytes, plus
the size of the linkage area (24), minus 4 so that we're looking
at the last parm instead of just past it.
*)
size: cstk #xs_parms min -> n \ n = no of xs parms in regs
#parm_bytes 20 + -> sys_SP_offs
#xs_parms n - -> #xs_not_in_regs \ we need this shortly
n 1+ 1
?DO i stk: cstk
cstk sys_SP_reg sys_SP_offs false push_to_mem
1cell --> sys_SP_offs
LOOP
size: cstk n - >size: cstk
THEN
(* now if there are any xs parms which weren't originally in regs, we
have to move them over. sys_SP_offs is OK already. The initial
SP offset is already in stk_offset, since we start from the
latest parm that we pushed onto the mem part of our data stack.
As we move the parms, we increment stk_offset since when we
finish the loop, we need stk_offset pointing to the next parm up,
which will be the first to be loaded into a reg by the equalization.
Note that we go UP in memory in the data stack, and continue coming
DOWN in the parm area.
*)
#xs_not_in_regs 0>
IF 0 select: GPRs
#xs_not_in_regs
FOR
SP_reg stk_offset 0 compPull: GPRs
sys_SP_reg sys_SP_offs false compPush: GPRs
1cell ++> stk_offset 1cell --> sys_SP_offs
NEXT
THEN
\ Now we set up cstk2 ready for equalization.
extern_mask
IF \ there are FP parms, so the setup is special
#extern_FP_parms setup_with_gpr_mask
ELSE
#extern_parm_cells 8 min
#extern_FP_parms
setup_normal_call
THEN
debug? if
." # extern parm cells " #extern_parm_cells . cr
." # excess parm cells " #xs_parms . cr
." # excess parm cells not in regs " #xs_not_in_regs . cr
." #extern_FP_parms " #extern_FP_parms . cr
." ready to equalize:" cr
printall: cstk printall: cstk2
printall: fcstk printall: fcstk2
then
false -> adjust_stks?
update_refcnts
;
: EQUALIZE_FOR_CALL { xt \ c#P c#PL c#FP c#FPL float? cEntry? hndlr -- xt' }
(* xt will be zero if we're not actually calling - such as
at EXIT. It will be 1 if this is an external call. (0 or odd
numbers can't ever be valid xts.) In other cases it's a normal
Mops xt, which is the addr of the flag bytes of a definition. The
code starts 2 bytes later.
c#P etc are the number of named parms etc. for the called word.
*)
debug? if
." equalize_for_call called - cstk:" cr printall: cstk
then
\ true -> equalizing?
0 -> c#P 0 -> c#PL
0 -> c#FP 0 -> c#FPL
false -> cLeaf? false -> cMeth? false -> float?
GPRs -> eq_regs
xt
NIF \ this is really an EXIT - just set up for equalization
debug? if
." not a call - gpr_rtn_cnt = " gpr_rtn_cnt . cr
then
get_rtn_cnts setup_normal_call
true -> adjust_stks? \ just in case
ELSE \ we're really calling
xt 1 and
IF \ external call - parms are handled differently
setup_extern_call
true -> float? \ external calls can do anything!
ELSE
\ normal Mops call - get flags, #parms and #rslts from callee
xt 2- w@ -> hndlr
xt c@ \ get flags and #results byte
dup $ 80 and 0<> -> cLeaf?
$ 10 and 0<> -> float?
xt 1+ c@ \ #parms & #locals are in this byte
dup 4 >> -> c#P
15 and -> c#PL
hndlr $ FF and $ 40 = -> cMeth?
\ methods are marked by hndlr code $BE40 or $BD40
hndlr $ BE05 = -> cEntry?
float?
IF \ we have FP flag bytes - 4 bytes, of which
\ we're currently only using the last 2, with
\ analogous meaning to the above:
4 ++> xt
xt 1+ c@
dup 4 >> -> c#FP
15 and -> c#FPL
THEN
cLeaf? \ are we calling a leaf routine?
IF
max_called_#PL c#PL max -> max_called_#PL
max_called_#FPL c#FPL max -> max_called_#FPL
c#P c#PL c#FP c#FPL setup_fast_call
ELSE
cEntry?
IF c#P c#FP
ELSE
gpr_call_cnt c#P max
fpr_call_cnt c#FP max
THEN
setup_normal_call
THEN
THEN
THEN
debug? if
." stacks set up for equalization:" cr
printall: cstk printall: cstk2
\ false -> debug?
then
set_backstop_CDP
\ can't allow recompiling of regs during equalization,
\ since we must assume callee wants them all even if
\ they haven't been used here. Also as we're setting
\ backstop_CDP, we don't need to bother putting this
\ equalization into eq_ranges, since we'd never look at
\ it anyway.
true -> equalizing?
false fix_duplicates
false equalize_depths
false equalize_refs
update_refcnts
adjust_stks? IF adjust_stks ELSE true -> adjust_stks? THEN
debug? if ." final stacks - should be the same:" cr
printall: cstk printall: cstk2
then
set_backstop_CDP \ can't hoist anything over a call, no matter
\ what!
false -> equalizing?
;
\ =========================================================
\ CONDITIONALS
\ =========================================================
(* The conditionals are different enough for the PowerPC that we'll
rewrite them - in particular, we do some work at basic block boundaries.
For >mark and <resolve, which must go with a branch instruction, we assume
the branch has just been compiled.
A >mark pushes the entire cstk, so that everything can be equalized at
>resolve time, then it pushes a check value for ?pairs. It also pushes
the branch addr onto control_stk, and a flag byte onto control_flags.
We use these separate stacks instead of the data stack, since we often
want to look at what's on them at various places when we might have
sundry items on the data stack.
Here's the bit assignments for the flag bytes in control_flags:
$01 this was pushed by <mark- i.e. this is a loop.
$02 other basic block is dead (had an unconditional EXIT)
$04 this basic block is dead
$08 extra item pushed by ?DO (otherwise we push zero)
$80 always branch (i.e. conditionally compiling on "false")
$40 never branch (i.e. conditionally compiling on "true")
For a forward "always branch" situation, >resolve actually deletes the
code being branched over (which could never be executed).
*)
false value will_skip? \ used globally. True while we're processing
\ conditionally compiled code when the
\ condition was "false".
false value cond_comp? \ only used locally - tells >MARK that
\ we're going into conditional compilation,
\ so it doesn't need to do anything since
\ at that point we've done it already.
: >MARK
debug? if
." >mark called - cond_comp? "
cond_comp? if ." true" else ." false" then cr
then
cond_comp? \ if conditional compilation, we've handled it already
IF false -> cond_comp?
EXIT
THEN
adjust_stks \ can't have a non-zero stk adjustment here either
save: cstk save: fcstk \ save cstk and fcstk on data stk
120 \ for ?pairs
CDP dup -> basic_block_start
4- push: control_stk
0 push: control_flags
;
: >RESOLVE { chk \ branchCDP -- }
debug? if
." >resolve called - control_stk:" printall: control_stk
then
chk 120 ?pairs
adjust_stks
pop: control_stk -> branchCDP
pop: control_flags drop
CDP branchCDP - branchCDP 2+ w!
branchCDP -> frNxtDP
(* Now if backstop_CDP has been moved to within the conditional section
(by a call to another word coming there, say), we need to move it
to where we are now, since we can't hoist a fetch into the middle
of a conditional section, since that would mean that it mightn't get
executed when we want it to be - not to mention that it would break
a resolved conditional branch offset.
*)
backstop_CDP branchCDP u>
IF set_backstop_CDP THEN
CDP -> basic_block_start
;
: >RESOLVE_COND_COMPILATION { \ flgs -- }
pop: control_stk -> startCDP
pop: control_flags -> flgs
debug? if
." >resolve_cond_compilation called" cr
." startCDP " startCDP .h cr
." flgs " flgs .h cr
then
flgs $ 80 =
IF \ "branch always" - i.e. delete everything from there to here
make_altered_regs_unknown \ mustn't allow matches - code to
\ be deleted!
startCDP -> CDP
startCDP -> basic_block_start
restore: fcstk restore: cstk update_refcnts
false -> will_skip?
ELSE
restore: fcstk2 restore: cstk2
THEN
debug? if
." control_flags at end of >resolve_cond_compilation" cr
printall: control_flags
then
;
: >RESOLVE&EQUALIZE ( <saved cstk state> ) { chk \ flgs branchCDP -- }
debug? if
." >resolve&equalize called - control_stk:" printall: control_stk
." control_flags:" printall: control_flags
." cstk:" printall: cstk
then
chk 120 ?pairs
adjust_stks
1 stk: control_flags -> flgs
flgs $ C0 and
IF >resolve_cond_compilation EXIT THEN
pop: control_stk -> branchCDP
pop: control_flags drop
flgs 2 and
IF \ There was an (unconditional) EXIT in the other basic block,
\ so we don't have to bother equalizing. We just resolve the
\ branch as at equalize_for_conditional above.
restore: fcstk2 restore: cstk2
\ just get rid of saved cstk & fcstk states - they're dead
debug? if
." other basic block had an EXIT - skipping equalization. cstk:" cr
printall: cstk cr
then
true
ELSE
flgs 4 and
IF \ there was an (unconditional) EXIT in THIS basic block, so
\ while we don't have to equalize, we do have to use the saved
\ cstk state.
restore: fcstk restore: cstk
debug? if
." this basic block had an EXIT - skipping equalization. cstk:" cr
printall: cstk cr
then
true
ELSE \ no EXIT
false
THEN
THEN
\ skip equalization?
IF branchCDP CDP resolve_branch
branchCDP -> frNxtDP
update_refcnts
ELSE \ do equalization
branchCDP equalize_for_conditional
\ resolves the branch as well, as it's all
\ tied in with the equalization code in a
\ nastily complex way!
THEN
branchCDP -> startCDP make_altered_regs_unknown
\ any regs altered in the conditional code
\ can't now have their values used because
\ we don't know what was executed!
backstop_CDP branchCDP u> \ see comment above in >resolve
IF set_backstop_CDP THEN
CDP -> basic_block_start
debug? if
." control_flags at end of >resolve&equalize" cr
printall: control_flags
then
;
: <MARK
debug? if
." <mark called - calling prepare_for_loop" cr
then
prepare_for_loop \ sets up cstk for what we have to
\ equalize to in the loop
save: cstk save: fcstk 121
CDP push: control_stk
1 push: control_flags \ 1 = this is a <mark, i.e. a loop
CDP -> loop_start
;
:f FIX_CONTAINING_LOOP { \ index -- }
0 -> loop_start
false -> will_skip? \ shouldn't need this, but I want to
\ be sure
size: control_stk 0EXIT \ there can't be a containing loop
size: control_stk 1- -> index
BEGIN \ loop over conditionals till we
\ maybe find a loop
index at: control_flags 1 and
IF \ got it!
index at: control_stk -> loop_start EXIT
THEN
1 --> index
index 0<
UNTIL
;f
0 value <resolve_target \ used by +LOOP
: <RESOLVE ( <saved cstk state> )
{ chk \ markCDP branchCDP svBranch -- }
debug? if
." <resolve called" cr
then
chk 121 ?pairs
\ note: not safe to do hoist_invariants here, since equalization
\ might move a reg, making the dest reg not invariant!
1 stk: control_stk dup -> markCDP -> <resolve_target
pop: control_flags drop
CDP 4- -> branchCDP
branchCDP @ -> svBranch \ save and remove the branch in case
4 --> CDP \ equalize_for_loop compiles something
markCDP equalize_for_loop
hoist_invariants \ let's try it here!
pop: control_stk -> markCDP
svBranch code, \ replace the branch
CDP 4- -> branchCDP
markCDP branchCDP - \ offset to go in branch instrn
branchCDP 2+ w!
(* Now similarly to the >resolve situation, if backstop_CDP has been moved
to within the loop (by a call to another word coming there, say), we
need to move it to where we are now. We can't hoist a fetch into the
middle of any conditional section since it mightn't get executed,
and anyway in the loop situation it would be pretty stupid to hoist
a fetch INTO a loop!!
*)
backstop_CDP markCDP u>
IF set_backstop_CDP THEN
fix_containing_loop
;
: (ELSE) { \ flgs -- }
debug? if
." (else) called - control_stk:" printall: control_stk
then
1 stk: control_stk -> startCDP
adjust_stks
make_altered_regs_unknown
1 stk: control_flags -> flgs \ control_flags will be popped by
\ >resolve_cond_compilation or >resolve
flgs $ C0 and
IF \ conditional compilation - special case. We wind up
\ the IF-stub, then set up the ELSE-stub, rather like
\ do_conditional_compilation below.
120 ?pairs
>resolve_cond_compilation
\ pop: control_flags drop \ >resolve doesn't get called from here
flgs $ C0 xor \ new flags byte
dup $ 80 = -> will_skip?
push: control_flags
CDP push: control_stk
save: cstk save: fcstk
120 \ for ?pairs
EXIT
THEN
startCDP CDP 4- =
IF \ nothing compiled in first section. We just recompile the
\ conditional branch, which is still set up in branch_instrn,
\ with the condition inverted.
4 --> CDP
invert: branch_instrn compile: branch_instrn
ELSE
$ BF090000 \ our temp code for the ELSE branch
1 stk: control_stk CDP - $ FFFF and
or code, \ until we resolve this branch, we leave the
\ offset to the original conditional branch in
\ the lo 16 bits. This allows us to adjust if
\ we end up deleting the ELSE branch.
THEN
>resolve
restore: fcstk2 restore: cstk2
save: cstk save: fcstk 120
CDP 4- push: control_stk
flgs 4 and 1 >> push: control_flags \ set "dead basic block" bit
\ for right BB
CDP -> basic_block_start
save: cstk2 save: fcstk2
restore: fcstk restore: cstk update_refcnts
;
: DO_CONDITIONAL_COMPILATION { invert? -- }
debug? if
." doing conditional compilation" cr
printall: cstk printall: control_stk
then
1 operands
adjust_stks
lit: opnd1 0<> invert? xor
IF $ 80 true -> will_skip?
ELSE $ 40
THEN
push: control_flags
CDP push: control_stk \ dummy
save: cstk save: fcstk
120 \ for ?pairs
true -> cond_comp? \ inhibits >MARK from doing anything, since
\ we've done it all already. Note we haven't
\ compiled any branch.
;
: PIF { invert? \ flgs -- }
debug? if
." PIF called" cr printall: cstk printall: fcstk cr
then
1 stk: cstk
refType: cstk litRef =
IF invert? do_conditional_compilation EXIT THEN
refType: cstk CRref <>
IF \ not in a CR field - get it there
" 0<>" evaluate \ easy!
THEN
1 operands
adjust_stks
opnd1 invert? setup_conditional_branch
reg: opnd1 dup select: CRs
NIF false -> using_CR0 THEN
CDP put: ivar> lastRefCDP in CRs
free: opnd1
compile: branch_instrn
;